PART A

Libraries Session

In [1]:
%tensorflow_version 2.x
import tensorflow
tensorflow.__version__
Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.
Out[1]:
'2.8.2'
In [2]:
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
In [3]:
import random
random.seed(0)

# Ignore the warnings
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import Conv2D, Activation, BatchNormalization
from tensorflow.keras.layers import UpSampling2D, Input, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras import backend as K
from PIL import Image
from numpy import asarray

PROJECT OBJECTIVE : To build a face detection system.

A.Import and read ‘images.npy’.

In [ ]:
# Loading the images file

data = np.load('/content/drive/MyDrive/images(1).npy', allow_pickle=True)
In [ ]:
data.shape

The file contains 409 images and labels. Let's view few images and their labels.

In [ ]:
data[0][0]
In [ ]:
data[408][1]

B.Split the data into Features(X) & labels(Y). Unify shape of all the images

In [ ]:
from tensorflow.keras.applications.mobilenet import preprocess_input

IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224

HEIGHT_CELLS = 28
WIDTH_CELLS = 28

IMAGE_SIZE = 224

masks = np.zeros((int(data.shape[0]), IMAGE_HEIGHT, IMAGE_WIDTH))
X = np.zeros((int(data.shape[0]),IMAGE_HEIGHT, IMAGE_WIDTH, 3))

for index in range(data.shape[0]):
  img = data[index][0]
  img = cv2.resize(img, dsize=(IMAGE_HEIGHT, IMAGE_WIDTH), interpolation=cv2.INTER_CUBIC)
  # assign all pixels in the first 3 channels only to the image, i.e., discard the alpha channel
  try:
    img = img[:,:,:3]
  except:
    print(f"Exception {index} Grayscale image with shape {img.shape}")
    # convert the grayscale image to color so that the number of channels are standardized to 3
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    continue
  X[index] = preprocess_input(np.array(img, dtype=np.float32))
  # Loop through the face co-ordinates and create mask out of it.
  for i in data[index][1]:
    x1 = int(i['points'][0]['x'] * IMAGE_WIDTH)
    x2 = int(i['points'][1]['x'] * IMAGE_WIDTH)
    y1 = int(i['points'][0]['y'] * IMAGE_HEIGHT)
    y2 = int(i['points'][1]['y'] * IMAGE_HEIGHT)
    # set all pixels within the mask co-ordinates to 1.
    masks[index][y1:y2, x1:x2] = 1
print(f"### Shape of X is '{X.shape}' and the shape of mask is '{masks.shape}' ")

C.Split the data into train and test[400:9].

In [ ]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, masks, test_size=0.2)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.2)

print(f"Shape of X_train is '{X_train.shape}' and the shape of y_train is '{y_train.shape}'")
print(f"Shape of X_val is '{X_val.shape}' and the shape of y_val is '{y_val.shape}'")
print(f"Shape of X_test is '{X_test.shape}' and the shape of y_test is '{y_test.shape}'")

D.Select random image from the train data and display original image and masked image.

In [ ]:
fig = plt.figure(figsize=(15, 15))
a = fig.add_subplot(1, 4, 1)
imgplot = plt.imshow(X_train[0])

a = fig.add_subplot(1, 4, 2)
imgplot = plt.imshow(X_train[10])
imgplot.set_clim(0.0, 0.7)

a = fig.add_subplot(1, 4, 3)
imgplot = plt.imshow(X_train[20])
imgplot.set_clim(0.0, 1.4)

a = fig.add_subplot(1, 4, 4)
imgplot = plt.imshow(X_train[30])
imgplot.set_clim(0.0, 2.1)

fig = plt.figure(figsize=(15, 15))
a = fig.add_subplot(1, 4, 1)
imgplot = plt.imshow(y_train[0])

a = fig.add_subplot(1, 4, 2)
imgplot = plt.imshow(y_train[10])
imgplot.set_clim(0.0, 0.7)

a = fig.add_subplot(1, 4, 3)
imgplot = plt.imshow(y_train[20])
imgplot.set_clim(0.0, 1.4)

a = fig.add_subplot(1, 4, 4)
imgplot = plt.imshow(y_train[30])
imgplot.set_clim(0.0, 1.4)

2.Model building

A.Design a face mask detection model.

In [ ]:
IMAGE_SIZE = 224
EPOCHS = 15
BATCH = 8
LR = 1e-4

def model():
    inputs = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3), name="input_image")
    
    encoder = MobileNetV2(input_tensor=inputs, weights="imagenet", include_top=False, alpha=0.35)
    skip_connection_names = ["input_image", "block_1_expand_relu", "block_3_expand_relu", "block_6_expand_relu"]
    encoder_output = encoder.get_layer("block_13_expand_relu").output
    
    f = [16, 32, 48, 64]
    x = encoder_output
    for i in range(1, len(skip_connection_names)+1, 1):
        x_skip = encoder.get_layer(skip_connection_names[-i]).output
        x = UpSampling2D((2, 2))(x)
        x = Concatenate()([x, x_skip])
        
        x = Conv2D(f[-i], (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        
        x = Conv2D(f[-i], (3, 3), padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        
    x = Conv2D(1, (1, 1), padding="same")(x)
    x = Activation("sigmoid")(x)
    
    model = Model(inputs, x)
    return model
In [ ]:
model = model()
model.summary()

B.Design your own Dice Coefficient and Loss function.

In [ ]:
smooth = 1e-15
def dice_coef(y_true, y_pred):
    y_true = tf.keras.layers.Flatten()(y_true)
    y_pred = tf.keras.layers.Flatten()(y_pred)
    intersection = tf.reduce_sum(y_true * y_pred)
    return (2. * intersection + smooth) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) + smooth)

def dice_loss(y_true, y_pred):
    return 1.0 - dice_coef(y_true, y_pred)

Compliling the Model

In [ ]:
opt = tf.keras.optimizers.Nadam(LR)
metrics = [dice_coef, Recall(), Precision()]
model.compile(loss=dice_loss, optimizer=opt, metrics=metrics)
In [ ]:
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4),
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=False)
]

C.Train and tune the model as required.

Training the Model

In [ ]:
train_steps = len(X_train)//BATCH
valid_steps = len(X_val)//BATCH

if len(X_train) % BATCH != 0:
    train_steps += 1
if len(X_val) % BATCH != 0:
    valid_steps += 1

model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    steps_per_epoch=train_steps,
    validation_steps=valid_steps,
    callbacks=callbacks
)

D.Evaluate and share insights on performance of the model.

In [ ]:
test_steps = (len(X_test)//BATCH)
if len(X_test) % BATCH != 0:
    test_steps += 1

model.evaluate(X_test, y_test, steps=test_steps)

The model has precision and recall of 65% and 62% respectively. The loss is 54% and dice coefficient is 44%.

3.Test the model predictions on the test image: ‘image with index 3 in the test data’ and visualise the predicted masks on the faces in the image.

In [ ]:
image = cv2.resize(X_test[3], (IMAGE_WIDTH, IMAGE_HEIGHT))
feat_scaled = preprocess_input(np.array(image, dtype=np.float32))
In [ ]:
feat_scaled
In [ ]:
y_pred = model.predict(np.array([feat_scaled]))
y_pred
In [ ]:
pred_mask = cv2.resize((1.0*(y_pred[0]>0.5)), (IMAGE_WIDTH,IMAGE_HEIGHT))
In [ ]:
pred_mask
In [ ]:
from google.colab.patches import cv2_imshow
In [ ]:
# cv2_imshow((feat_scaled).astype(np.uint8))
cv2_imshow((feat_scaled).astype(np.uint8))
In [ ]:
# plt.imshow((pred_mask))
plt.imshow((pred_mask).astype(np.uint16))

Part 2

In [30]:
from tqdm.notebook import trange, tqdm
from IPython.display import Image, display, Markdown, clear_output
from zipfile import ZipFile
In [30]:
 

1.Read/import images from folder ‘training_images’.

In [31]:
project_path = '/content/drive/MyDrive/'
image_files = 'training_images-20211126T092819Z-001.zip'
In [32]:
images_zip_path = os.path.join(project_path, image_files)

with ZipFile(images_zip_path, 'r') as z:
  z.extractall()
In [33]:
## Get the Unzipped Location in the drive

zip_dir_loc = z.filelist[0].filename.split("/")[0] 
zip_dir_loc
Out[33]:
'training_images'
In [34]:
raw_img_file_names = [os.path.join(zip_dir_loc,i) for i in os.listdir(zip_dir_loc)]
raw_img_file_names[:5]
Out[34]:
['training_images/real_00306.jpg',
 'training_images/real_00155.jpg',
 'training_images/real_00574.jpg',
 'training_images/real_00993.jpg',
 'training_images/real_00585.jpg']
In [35]:
#Reading the images 

img_list = []
for imgs in tqdm(raw_img_file_names):
  tst_img = cv2.imread(imgs)
  img_list.append(tst_img)
img_list = np.array(img_list)
display(Markdown(f"#### {img_list.shape}"))

(1091, 600, 600, 3)

In [36]:
# Viewing random images

from google.colab.patches import cv2_imshow

for i in img_list[:5,]:
  cv2_imshow(cv2.resize(i,(224,224)))
In [37]:
#Defining a function to create bounding boxes
def test_bb(df,fname,title=""):

  tst_img = cv2.imread(fname)
  temp_df = df[df['Image_Name'] == fname]
  rect_img = []
  for rows in temp_df.index:
    x = df['x'][rows]
    y = df['y'][rows]
    w = df['w'][rows]
    h = df['h'][rows]
    cv2.rectangle(tst_img,(x,y),(x+w,y+h),(255,0,0),2)
    cv2.putText(tst_img, title, (int((x+w)*0.75),y-3),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255))
  cv2_imshow(tst_img)
  
  return
In [38]:
#Defining a function to read images and resize them
def show_face(img_list,scale=1.0):

  for imgs in img_list:
    img = cv2.imread(imgs)
    img_w  = int(img.shape[1]*scale)
    img_h = int(img.shape[0]*scale)
    img = cv2.resize(img,(img_w,img_h))
    display(Markdown(f"#### {imgs}"))
    cv2_imshow(img)

  return
In [39]:
#Downloading the HAAR Model
!wget https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml
--2022-09-22 02:44:43--  https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 930127 (908K) [text/plain]
Saving to: ‘haarcascade_frontalface_default.xml’

haarcascade_frontal 100%[===================>] 908.33K  --.-KB/s    in 0.06s   

2022-09-22 02:44:43 (15.4 MB/s) - ‘haarcascade_frontalface_default.xml’ saved [930127/930127]

In [40]:
haar_img_box_df = pd.DataFrame(columns=['x','y','w','h','Total_Faces','Image_Name'])
haar_img_box_df
Out[40]:
x y w h Total_Faces Image_Name
In [41]:
# Detecting Faces using HAAR Model

face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

test_img = cv2.imread(raw_img_file_names[0])
grey = cv2.cvtColor(test_img, cv2.COLOR_BGR2GRAY)
# Detect faces
faces = face_cascade.detectMultiScale(grey,1.1,4)
# Draw rectangle around the faces
for (x, y, w, h) in faces:
  cv2.rectangle(test_img, (x, y), (x+w, y+h), (255, 0, 0), 2)
  cv2.putText(test_img, "HaarCascadeClassifier", (int((x+w)*0.75),y-3),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255))

# Display the output
display(Markdown(f"### Bounding Box parameters are `x`:{x}, `y`:{y}, `width`:{w}, `height`:{h}"))
cv2_imshow(test_img)

Bounding Box parameters are x:100, y:199, width:350, height:350

In [42]:
# Detecting faces for all the images

%%time
haar_undetected_images = []
haar_detected_images = []

for imgs, fnames in tqdm(zip(img_list,raw_img_file_names)):
  gray = cv2.cvtColor(imgs,cv2.COLOR_BGR2GRAY)
  faces = face_cascade.detectMultiScale(gray,1.1,4)
  if len(faces) == 0:
    haar_undetected_images.append(fnames)
    temp_dict = {'x':0, 
                 'y':0, 
                 'w':-1,
                 'h':-1, 
                 'Total_Faces':0,
                 'Image_Name':fnames} 
  else:
    haar_detected_images.append(fnames)
    for (x,y,w,h) in faces:
      temp_dict = {'x':x, 
                  'y':y, 
                  'w':w,
                  'h':h, 
                  'Total_Faces':len(faces),
                  'Image_Name':fnames} 
      haar_img_box_df = haar_img_box_df.append(temp_dict,ignore_index=True)
display(Markdown(f"#### Detected faces for {len(haar_detected_images)} images"))
display(Markdown(f"#### Failed to detect faces for {len(haar_undetected_images)} images"))

Detected faces for 930 images

Failed to detect faces for 161 images

CPU times: user 6min 30s, sys: 3.23 s, total: 6min 33s
Wall time: 3min 30s
In [43]:
haar_img_box_df
Out[43]:
x y w h Total_Faces Image_Name
0 100 199 350 350 1 training_images/real_00306.jpg
1 96 176 389 389 1 training_images/real_00574.jpg
2 192 85 359 359 1 training_images/real_00993.jpg
3 107 152 403 403 1 training_images/real_00585.jpg
4 163 123 403 403 1 training_images/real_01039.jpg
... ... ... ... ... ... ...
1006 38 74 464 464 1 training_images/real_00487.jpg
1007 69 67 475 475 1 training_images/real_01079.jpg
1008 60 156 366 366 1 training_images/real_00243.jpg
1009 36 97 476 476 1 training_images/real_00328.jpg
1010 27 51 477 477 1 training_images/real_00272.jpg

1011 rows × 6 columns

In [44]:
haar_img_box_df[haar_img_box_df['Total_Faces'] > 1]
Out[44]:
x y w h Total_Faces Image_Name
6 68 220 56 56 3 training_images/real_00730.jpg
7 165 167 402 402 3 training_images/real_00730.jpg
8 81 249 139 139 3 training_images/real_00730.jpg
13 103 177 376 376 2 training_images/real_00606.jpg
14 19 298 88 88 2 training_images/real_00606.jpg
... ... ... ... ... ... ...
934 100 477 73 73 2 training_images/real_00047.jpg
962 36 349 104 104 2 training_images/real_00219.jpg
963 535 437 60 60 2 training_images/real_00219.jpg
992 240 340 35 35 2 training_images/real_00656.jpg
993 102 332 80 80 2 training_images/real_00656.jpg

156 rows × 6 columns

In [45]:
# Viewing samples of correctly and incorrectly detected faces using the HAAR Model

display(Markdown("### (1) Correctly detected 1 face"))
test_bb(haar_img_box_df,"training_images/real_00115.jpg",title="Haar")
display(Markdown("### (2) Incorrectly detected multiple faces"))
test_bb(haar_img_box_df,"training_images/real_00730.jpg",title="Haar")

(1) Correctly detected 1 face

(2) Incorrectly detected multiple faces

In [46]:
# Incorrectly detected faces using the HAAR Model

show_face(haar_undetected_images[-5:],scale=0.4)

training_images/real_00319(1).jpg

training_images/real_01081.jpg

training_images/real_00990.jpg

training_images/real_00519.jpg

training_images/real_00910.jpg

In [47]:
# Downloading the MTCNN model to detect faces

!pip install mtcnn
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mtcnn
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
     |████████████████████████████████| 2.3 MB 4.1 MB/s 
Requirement already satisfied: keras>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from mtcnn) (2.8.0)
Requirement already satisfied: opencv-python>=4.1.0 in /usr/local/lib/python3.7/dist-packages (from mtcnn) (4.6.0.66)
Requirement already satisfied: numpy>=1.14.5 in /usr/local/lib/python3.7/dist-packages (from opencv-python>=4.1.0->mtcnn) (1.21.6)
Installing collected packages: mtcnn
Successfully installed mtcnn-0.1.1
In [48]:
from mtcnn.mtcnn import MTCNN
mtcnn_det = MTCNN()
In [49]:
# Detecting faces using the MTCNN Model

mtcnn_tst_img = cv2.imread(raw_img_file_names[0])
mt_cvt = cv2.cvtColor(mtcnn_tst_img,cv2.COLOR_BGR2RGB)
mt_faces = mtcnn_det.detect_faces(mt_cvt)
for face in mt_faces:
  mt_x, mt_y,mt_w,mt_h = face['box']
  cv2.rectangle(mtcnn_tst_img,(mt_x,mt_y),(mt_x + mt_w,mt_y + mt_h),(255,0,0),2)
  cv2.putText(mtcnn_tst_img, "MTCNN", (int((mt_x+mt_w)*1),mt_y-3),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255))
cv2_imshow(mtcnn_tst_img)
In [50]:
mtcnn_img_box_df = pd.DataFrame(columns=['x','y','w','h','Total_Faces','Image_Name'])
mtcnn_img_box_df
Out[50]:
x y w h Total_Faces Image_Name
In [51]:
%%time
mtcnn_undetected_images = []
mtcnn_detected_images = []
for imgs, fnames in tqdm(zip(img_list,raw_img_file_names)):
  cvt_img = cv2.cvtColor(imgs,cv2.COLOR_BGR2RGB)
  faces = mtcnn_det.detect_faces(cvt_img)
  if len(faces) == 0:
    mtcnn_undetected_images.append(fnames)
    temp_dict = {'x':0, 
                 'y':0, 
                 'w':-1,
                 'h':-1, 
                 'Total_Faces':0,
                 'Image_Name':fnames} 
  else:
    mtcnn_detected_images.append(fnames)
    for face in faces:
      temp_dict = {'x':face['box'][0], 
                  'y':face['box'][1], 
                  'w':face['box'][2],
                  'h':face['box'][3], 
                  'Total_Faces':len(faces),
                  'Image_Name':fnames} 
      mtcnn_img_box_df = mtcnn_img_box_df.append(temp_dict,ignore_index=True)

display(Markdown(f"#### Detected faces for {len(mtcnn_detected_images)} images"))
display(Markdown(f"#### Failed to detect faces for {len(mtcnn_undetected_images)} images"))

Detected faces for 1086 images

Failed to detect faces for 5 images

CPU times: user 16min 25s, sys: 28.6 s, total: 16min 54s
Wall time: 15min 21s
In [52]:
mtcnn_img_box_df
Out[52]:
x y w h Total_Faces Image_Name
0 133 142 313 429 1 training_images/real_00306.jpg
1 35 0 477 607 1 training_images/real_00155.jpg
2 124 168 305 401 1 training_images/real_00574.jpg
3 116 52 388 501 1 training_images/real_00993.jpg
4 123 121 344 469 1 training_images/real_00585.jpg
... ... ... ... ... ... ...
1098 66 0 427 567 1 training_images/real_00487.jpg
1099 104 44 405 564 1 training_images/real_01079.jpg
1100 135 142 301 415 1 training_images/real_00243.jpg
1101 72 41 444 539 1 training_images/real_00328.jpg
1102 64 56 422 492 1 training_images/real_00272.jpg

1103 rows × 6 columns

In [53]:
display(mtcnn_img_box_df[mtcnn_img_box_df['Total_Faces'] > 1])
display(Markdown(f"#### Number of images with more than 1 face detected : {len(mtcnn_img_box_df[mtcnn_img_box_df['Total_Faces'] > 1])}"))
x y w h Total_Faces Image_Name
38 175 130 309 441 2 training_images/real_00590.jpg
39 463 193 65 77 2 training_images/real_00590.jpg
48 95 91 366 432 2 training_images/real_00178.jpg
49 25 224 57 75 2 training_images/real_00178.jpg
101 139 148 317 433 2 training_images/real_00699.jpg
102 50 472 64 78 2 training_images/real_00699.jpg
158 142 115 351 476 2 training_images/real_01054.jpg
159 429 14 181 197 2 training_images/real_01054.jpg
204 101 64 368 484 2 training_images/real_00201.jpg
205 473 257 56 75 2 training_images/real_00201.jpg
333 103 65 361 491 2 training_images/real_00241.jpg
334 29 492 46 71 2 training_images/real_00241.jpg
423 56 72 429 491 2 training_images/real_00788.jpg
424 374 1 55 65 2 training_images/real_00788.jpg
444 170 184 321 426 2 training_images/real_00950.jpg
445 117 28 51 60 2 training_images/real_00950.jpg
474 147 152 296 390 2 training_images/real_00191.jpg
475 133 180 22 22 2 training_images/real_00191.jpg
492 145 130 329 430 2 training_images/real_00591.jpg
493 45 112 21 28 2 training_images/real_00591.jpg
503 151 149 387 449 2 training_images/real_00666.jpg
504 106 379 58 75 2 training_images/real_00666.jpg
542 133 121 319 417 2 training_images/real_00004.jpg
543 552 337 24 29 2 training_images/real_00004.jpg
563 119 158 317 429 2 training_images/real_00274.jpg
564 190 10 240 261 2 training_images/real_00274.jpg
720 69 26 472 597 2 training_images/real_01004.jpg
721 539 231 28 36 2 training_images/real_01004.jpg
900 93 134 348 393 2 training_images/real_00259.jpg
901 194 185 310 391 2 training_images/real_00259.jpg
911 109 79 366 487 2 training_images/real_01006.jpg
912 461 187 40 51 2 training_images/real_01006.jpg
1082 112 73 385 492 2 training_images/real_01058.jpg
1083 193 0 68 74 2 training_images/real_01058.jpg

Number of images with more than 1 face detected : 34

In [54]:
display(Markdown("### (1) Correctly detected 1 face"))
test_bb(mtcnn_img_box_df,"training_images/real_00115.jpg",title="MTCNN")
display(Markdown("### (2) Correctly detected one face and incorrectly the other one"))
test_bb(mtcnn_img_box_df,"training_images/real_00699.jpg",title="MTCNN")

(1) Correctly detected 1 face

(2) Correctly detected one face and incorrectly the other one

In [55]:
show_face(mtcnn_undetected_images,scale=0.4)

training_images/real_00692.jpg

training_images/real_00696.jpg

training_images/real_00951.jpg

training_images/real_00672.jpg

training_images/real_00818.jpg

Observation:

There are about 5 images where a face could not be detected due to:

  • Face partailly covered
  • Face zoomed such that it crops part of the face
  • Poor illumination or partially lit surfaces on the face

The MTCNN has detected face in images where,

  • Tilted face
  • Side face
In [56]:
# Displaying all the faces that were not detected by either HAAR or MTCNN Model

haar_set = set(haar_undetected_images)
mtcnn_set = set(mtcnn_undetected_images)
show_face(haar_set.intersection(mtcnn_set),0.4)

training_images/real_00951.jpg

training_images/real_00818.jpg

training_images/real_00692.jpg

Observation:

  • When compared performance-wise, MTCNN model did a better job by detecting mostly all faces except for 5 images
  • MTCNN is time consuming compared to HAAR model. This model cannot be used for real-time applications where faces need to be detected fast.

PART C

1.Unzip, read and Load data(‘PINS.zip’) into session. [2 Marks]

2.Write function to create metadata of the image. [4 Marks]Hint: Metadata means derived information from the available data which can be useful for particular problem statement.

3.Write a loop to iterate through each and every image and create metadata for all the images. [4 Marks]

4.Generate Embeddings vectors on the each face in the dataset. [4 Marks]Hint: Use ‘vgg_face_weights.h5’

5.Build distance metrics for identifying the distance between two similar and dissimilar images. [4 Marks]

6.Use PCA for dimensionality reduction. [2 Marks]

7.Build an SVM classifier in order to map each image to its right person. [4 Marks]

8.Import and display the the test images. [2 Marks]Hint: ‘Benedict Cumberbatch9.jpg’ and ‘Dwayne Johnson4.jpg’ are the test images.

9.Use the trained SVM model to predict the face on both test images. [4 Marks]

In [4]:
cd '/content/drive/MyDrive/'
/content/drive/MyDrive
In [5]:
project_path = '/content/drive/MyDrive/'
In [6]:
# from zipfile import ZipFile

# # specifying the zip file name
# file_name = project_path + "PINS.zip"
  
# # opening the zip file in READ mode
# with ZipFile(file_name, 'r') as zip:
#     # printing all the contents of the zip file
#     # zip.printdir()
  
#     # extracting all the files
#     print('Extracting all the files now...')
#     zip.extractall()
#     print('Done!')
In [7]:
directory = '/content/drive/MyDrive/PINS'
In [8]:
# Renaming the images and extracting only the person's name

for filename in os.listdir(directory):
    os.rename(os.path.join(directory,filename),os.path.join(directory, filename.replace(' ', '_').upper()))
    str = "PINS_"
    if str in filename:    
        filepath = os.path.join(directory, filename)
        newfilepath = os.path.join(directory, filename.replace(str," "))
        os.rename(filepath, newfilepath)

for subdir, dirs, files in os.walk(directory):
    for file in files:
      os.rename(os.path.join(subdir, file), os.path.join(subdir, file.replace(' ', '_')))
In [9]:
# Displaying all the labels

import glob

paths = glob.glob("PINS/*")
label_names = [os.path.split(x)[1] for x in paths]

print("Total labels: {}".format(len(label_names)))
print("Labels: {}".format((label_names)))
Total labels: 100
Labels: ['_MARK_ZUCKERBERG', '_AMBER_HEARD_FACE', '_MARTIN_STARR', '_BARBARA_PALVIN_FACE', '_JASON_MOMOA', '_PEDRO_ALONSO', '_JASON_ISAACS', '_CHRIS_PRATT', '_JIM_PARSONS', '_EMILIA_CLARKE', '_SOPHIE_TURNER', '_AMAURY_NOLASCO', '_WILLA_HOLLAND', '_PAUL_RUDD', '_BRYAN_CRANSTON', '_LINDSEY_MORGAN_FACE', '_RAMI_MELEK', '_JESSE_EISENBERG', '_SCARLETT_JOHANSSON', '_KRISTEN_STEWART_FACE', '_DUA_LIPA_FACE', '_ROBIN_TAYLOR', '_PETER_DINKLAGE', '_MORENA_BACCARIN', '_BRIE_LARSON', '_JEREMY_RENNER', '_MORGAN_FREEMAN', '_TOM_CAVANAGH', '_EMMA_STONE', '_TAYLOR_SWIFT', '_ALVARO_MORTE', '_AARON_PAUL', '_BRIT_MARLING', '_CHANCE_PERDOMO', '_TOM_HOLLAND_FACE', '_RIHANNA', '_ANNA_GUNN', '_ALEXANDRA_DADDARIO', '_ELIZA_TAYLOR', '_DAVE_FRANCO', '_CAMERON_MONAGHAN', '_KUMAIL_NANJIANI', '_BELLAMY_BLAKE_FACE', '_BILL_GATES', '_DRAKE', '_ELON_MUSK', '_GAL_GADOT_FACE', '_SUNDAR_PICHAI', '_ROBERT_KNEPPER', '_MIGUEL_HERRAN', '_MELISSA_BENOIT', '_MILLIE_BOBBY_BROWN', '_GRANT_GUSTIN_FACE', '_DOMINIC_PURCELL', '_EMILY_BETT_RICKARDS', '_EMMA_WATSON_FACE', '_ROBERT_DOWNEY_JR_FACE', '_BETSY_BRANDT', '_JEFF_BEZOS', '_WENTWORTH_MILLER', '_URSULA_CORBERO', '_SARAH_WAYNE_CALLIES', '_SHAKIRA', '_KRYSTEN_RITTER', '_BENEDICT_CUMBERBATCH', '_JOSH_RADNOR', '_CHADWICK_BOSEMAN_FACE', '_HENRY_CAVIL', '_SEBASTIAN_STAN', '_DANIELLE_PANABAKER', '_MARK_RUFFALO', '_KIT_HARINGTON', '_MIKE_COLTER', '_GWYNETH_PALTROW', '_DAVID_MAZOUZ', '_WILLIAM_FICHTNER', '_COBIE_SMULDERS', '_NATALIE_PORTMAN_', '_CHRIS_EVANS', '_ELIZABETH_OLSEN_FACE', '_JON_BERNTHAL', '_NEIL_PATRICK_HARRIS', '_MARGOT_ROBBIE_FACE', '_MAISIE_WILLIAMS', '_DWAYNE_JOHNSON', '_STEPHEN_AMELL', '_ZENDAYA', '_CAITY_LOTZ', '_KIERNAN_SHIPKA_', '_ANNE_HATHAWAY', '_RJ_MITTE', '_SEAN_PERTWEE', '_MARIA_PEDRAZA', '_SELENA_GOMEZ', '_AMANDA_CREW', '_ALYCIA_DEBNAM_CAREY_FACE', '_BRENTON_THWAITES', '_RYAN_REYNOLDS', '_THOMAS_MIDDLEDITCH', '_TATI_GABRIELLE']
In [10]:
# Saving the metadata in a dataframe

df = pd.DataFrame(columns=['Person', 'No. of Images','File Path', 'Files'])

folders = [x[0] for x in os.walk(directory)]

for subfolders in folders:
  fileList = os.listdir(subfolders)
  filePath = os.path.abspath(subfolders)
  person = (subfolders)[len(directory):]
  df = df.append({'Person': person, 'No. of Images': len(fileList), 'File Path': filePath , 'Files': fileList}, ignore_index=True)

df
Out[10]:
Person No. of Images File Path Files
0 100 /content/drive/MyDrive/PINS [_MARK_ZUCKERBERG, _AMBER_HEARD_FACE, _MARTIN_...
1 /_MARK_ZUCKERBERG 62 /content/drive/MyDrive/PINS/_MARK_ZUCKERBERG [mark_zuckerberg1.jpg, mark_zuckerberg50.jpg, ...
2 /_AMBER_HEARD_FACE 151 /content/drive/MyDrive/PINS/_AMBER_HEARD_FACE [amber_heard_face115.jpg, amber_heard_face101....
3 /_MARTIN_STARR 48 /content/drive/MyDrive/PINS/_MARTIN_STARR [Martin_Starr31.jpg, Martin_Starr25.jpg, Marti...
4 /_BARBARA_PALVIN_FACE 142 /content/drive/MyDrive/PINS/_BARBARA_PALVIN_FACE [barbara_palvin_face1.jpg, barbara_palvin_face...
... ... ... ... ...
96 /_ALYCIA_DEBNAM_CAREY_FACE 0 /content/drive/MyDrive/PINS/_ALYCIA_DEBNAM_CAR... []
97 /_BRENTON_THWAITES 0 /content/drive/MyDrive/PINS/_BRENTON_THWAITES []
98 /_RYAN_REYNOLDS 0 /content/drive/MyDrive/PINS/_RYAN_REYNOLDS []
99 /_THOMAS_MIDDLEDITCH 0 /content/drive/MyDrive/PINS/_THOMAS_MIDDLEDITCH []
100 /_TATI_GABRIELLE 0 /content/drive/MyDrive/PINS/_TATI_GABRIELLE []

101 rows × 4 columns

In [16]:
import glob
In [17]:
from mpl_toolkits.axes_grid1 import ImageGrid

from glob import glob
fig = plt.figure(1, (300, 100))
grid = ImageGrid(fig, 111, nrows_ncols=(5, 5), axes_pad=0.1)

n_img_class = []
counter = 0
for idx, label in enumerate(label_names):
    paths = glob.glob(os.path.join("PINS/", label, "*.jpg"))
    n_img_class.append([label, len(paths)])
    perm = np.random.choice(len(paths), size=10)

    title = True
    for ii in perm:
        ax = grid[counter]
        if title:
            ax.text(1800, 105, label, verticalalignment="center")
            title = False
        img = cv2.imread(paths[ii])
        img = cv2.resize(img, (150, 150))
        ax.axis("off")
        ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        counter += 1
        
plt.show()
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-17-693174629836> in <module>
      8 counter = 0
      9 for idx, label in enumerate(label_names):
---> 10     paths = glob.glob(os.path.join("PINS/", label, "*.jpg"))
     11     n_img_class.append([label, len(paths)])
     12     perm = np.random.choice(len(paths), size=10)

AttributeError: 'function' object has no attribute 'glob'
In [18]:
# Creating the features and target sets

#save the path into variable to use it easily next times.
TRAIN_FOLDER= "/content/drive/MyDrive/PINS"

from glob import glob
images = []
classes=[]
missing=0
for class_folder_name in os.listdir(TRAIN_FOLDER):
    class_folder_path = os.path.join(TRAIN_FOLDER, class_folder_name)
    class_label = class_folder_name
    
    for image_path in glob(os.path.join(class_folder_path, "*.jpg")):
        image_bgr = cv2.imread(image_path, cv2.IMREAD_COLOR)
        if image_bgr is None: # if the file contain any missing value ignore it
          missing += 1
          continue
        images.append(image_bgr)
        
        classes.append(class_label)
In [19]:
(classes)
Out[19]:
['_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_MARK_ZUCKERBERG',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_AMBER_HEARD_FACE',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_MARTIN_STARR',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_BARBARA_PALVIN_FACE',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_JASON_MOMOA',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_PEDRO_ALONSO',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_JASON_ISAACS',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_CHRIS_PRATT',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_JIM_PARSONS',
 '_EMILIA_CLARKE',
 '_EMILIA_CLARKE',
 '_EMILIA_CLARKE',
 '_EMILIA_CLARKE',
 '_EMILIA_CLARKE',
 '_EMILIA_CLARKE',
 '_EMILIA_CLARKE',
 '_EMILIA_CLARKE',
 '_EMILIA_CLARKE',
 '_EMILIA_CLARKE',
 '_EMILIA_CLARKE',
 '_EMILIA_CLARKE',
 ...]
In [20]:
len(images)
Out[20]:
4396
In [21]:
# Performing preprocessing for all the images

# resize image
def resize_images(img):

  img = np.array(img).astype(np.uint8)
  #print(img.dtype)
  res = cv2.resize(img,(224,224), interpolation = cv2.INTER_CUBIC)
  return res

#save resized images into images.
images = [resize_images(img) for img in images]
In [22]:
images[0].shape
Out[22]:
(224, 224, 3)
In [23]:
# Displaying the shapes of images and classes

#see number of images in each label
images = np.array(images)
classes = np.array(classes)
print("images shape: ", images.shape)
print("classes shape: ", classes.shape)
images shape:  (4396, 224, 224, 3)
classes shape:  (4396,)
In [24]:
#Viewing the distribution of number of images in each class
import matplotlib as mp

values =[]
labels=[]
for label in set(classes):
  values.append(len(images[classes == label]))
  labels.append(label)

values.sort()

code=np.arange(0,13,1)
data_normalizer = mp.colors.Normalize()
color_map = mp.colors.LinearSegmentedColormap(
    "my_map",
    {
        "red": [(0, 1.0, 1.0),
                (1.0, .5, .5)],
        "green": [(0, 0.5, 0.5),
                  (1.0, 0, 0)],
        "blue": [(0, 0.50, 0.5),
                 (1.0, 0, 0)]
    }
)

fig, ax = plt.subplots(figsize=(150,50))
ax.bar(labels, values, color=color_map(data_normalizer(code)))
plt.xticks(fontsize = 8)
plt.yticks(fontsize = 10)
plt.xlabel("Species", fontsize = 14)
plt.ylabel("Number of images", fontsize = 14)
plt.title("Distribution of images in each class", fontsize=14)
plt.show()
In [25]:
class IdentityMetadata():
    def __init__(self, base, name, file):
        # dataset base directory
        self.base = base
        # identity name
        self.name = name
        # image file name
        self.file = file

    def __repr__(self):
        return self.image_path()

    def image_path(self):
        return os.path.join(self.base, self.name, self.file) 
    
def load_metadata(path):
    metadata = []
    for i in os.listdir(path):
        for f in os.listdir(os.path.join(path, i)):
            metadata.append(IdentityMetadata(path, i, f))
    return np.array(metadata)

metadata = load_metadata(directory)
In [26]:
! pip install git+https://github.com/rcmalli/keras-vggface.git
!pip install keras_applications --no-deps
filename = "/usr/local/lib/python3.7/dist-packages/keras_vggface/models.py"
text = open(filename).read()
open(filename, "w+").write(text.replace('keras.engine.topology', 'tensorflow.keras.utils'))
import tensorflow as tf

from keras_vggface.vggface import VGGFace
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/rcmalli/keras-vggface.git
  Cloning https://github.com/rcmalli/keras-vggface.git to /tmp/pip-req-build-lqmc6mkb
  Running command git clone -q https://github.com/rcmalli/keras-vggface.git /tmp/pip-req-build-lqmc6mkb
Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.7/dist-packages (from keras-vggface==0.6) (1.21.6)
Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.7/dist-packages (from keras-vggface==0.6) (1.7.3)
Requirement already satisfied: h5py in /usr/local/lib/python3.7/dist-packages (from keras-vggface==0.6) (3.1.0)
Requirement already satisfied: pillow in /usr/local/lib/python3.7/dist-packages (from keras-vggface==0.6) (7.1.2)
Requirement already satisfied: keras in /usr/local/lib/python3.7/dist-packages (from keras-vggface==0.6) (2.8.0)
Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.7/dist-packages (from keras-vggface==0.6) (1.15.0)
Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from keras-vggface==0.6) (6.0)
Requirement already satisfied: cached-property in /usr/local/lib/python3.7/dist-packages (from h5py->keras-vggface==0.6) (1.5.2)
Building wheels for collected packages: keras-vggface
  Building wheel for keras-vggface (setup.py) ... done
  Created wheel for keras-vggface: filename=keras_vggface-0.6-py3-none-any.whl size=8325 sha256=a08f03618d48c8b257a3488070d4c812de777e8b0deed2c8814b99a930225780
  Stored in directory: /tmp/pip-ephem-wheel-cache-c8rvc7k3/wheels/08/df/86/0225d44647ab2256dbf1e006823288fe9cc86367a056e6ea2c
Successfully built keras-vggface
Installing collected packages: keras-vggface
Successfully installed keras-vggface-0.6
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras_applications
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
     |████████████████████████████████| 50 kB 4.7 MB/s 
Installing collected packages: keras-applications
Successfully installed keras-applications-1.0.8
In [27]:
from numpy import expand_dims
from keras_vggface.utils import preprocess_input
In [28]:
# Detecting Faces for Face Recognition

!pip install mtcnn
# confirm mtcnn was installed correctly
import mtcnn
# print version
print(mtcnn.__version__)
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mtcnn
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
     |████████████████████████████████| 2.3 MB 8.5 MB/s 
Requirement already satisfied: keras>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from mtcnn) (2.8.0)
Requirement already satisfied: opencv-python>=4.1.0 in /usr/local/lib/python3.7/dist-packages (from mtcnn) (4.6.0.66)
Requirement already satisfied: numpy>=1.14.5 in /usr/local/lib/python3.7/dist-packages (from opencv-python>=4.1.0->mtcnn) (1.21.6)
Installing collected packages: mtcnn
Successfully installed mtcnn-0.1.1
0.1.0
In [29]:
# The function extract_face_from_image() extracts all faces from an image:

# extract a single face from a given photograph
def extract_face(filename, required_size=(224, 224)):
	# load image from file
	pixels = cv2.imread(filename)
 
	# create the detector, using default weights
	detector = mtcnn()
 
	# detect faces in the image
	results = detector.detect_faces(pixels)
 
	# extract the bounding box from the first face
	x1, y1, width, height = results[0]['box']
	x2, y2 = x1 + width, y1 + height

	# extract the face
	face = pixels[y1:y2, x1:x2]

	# resize pixels to the model size
	image = Image.fromarray(face)
	image = image.resize(required_size)
	face_array = asarray(image)
	return face_array
In [30]:
pip install keras_vggface
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Requirement already satisfied: keras_vggface in /usr/local/lib/python3.7/dist-packages (0.6)
Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (1.15.0)
Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (6.0)
Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (1.7.3)
Requirement already satisfied: keras in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (2.8.0)
Requirement already satisfied: pillow in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (7.1.2)
Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (1.21.6)
Requirement already satisfied: h5py in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (3.1.0)
Requirement already satisfied: cached-property in /usr/local/lib/python3.7/dist-packages (from h5py->keras_vggface) (1.5.2)
In [31]:
# check version of keras_vggface

import keras_vggface

# print version
print(keras_vggface.__version__)
0.6
In [32]:
# Loading the pretrained vggface weights

weights_file = '/content/drive/MyDrive/vgg_face_weights.h5'
In [33]:
#import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import ZeroPadding2D,Convolution2D,MaxPooling2D
from tensorflow.keras.layers import Dense,Dropout,Softmax,Flatten,Activation,BatchNormalization
from tensorflow.keras.preprocessing.image import load_img,img_to_array
from tensorflow.keras.applications.imagenet_utils import preprocess_input
import tensorflow.keras.backend as K

# Define VGG_FACE_MODEL architecture
model = Sequential()
model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))

model.add(ZeroPadding2D((1,1)))	
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))

model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))

model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))

model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))

model.add(Convolution2D(4096, (7, 7), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(4096, (1, 1), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(2622, (1, 1)))
model.add(Flatten())
model.add(Activation('softmax'))

# Load VGG Face model weights
model.load_weights(weights_file)
In [34]:
# Remove last Softmax layer and get model upto last flatten layer #with outputs 2622 units 
vgg_face_descriptor=Model(inputs=model.layers[0].input,outputs=model.layers[-2].output) 
In [41]:
cd '/content/drive/MyDrive/PINS/_PETER_DINKLAGE'
/content/drive/MyDrive/PINS/_PETER_DINKLAGE
In [42]:
# Viewing a sample image

sample_pic = '/content/drive/MyDrive/PINS/_PETER_DINKLAGE/Peter_Dinklage100_142.jpg'
# load image from file
image = plt.imread(sample_pic)
plt.imshow(image)
Out[42]:
<matplotlib.image.AxesImage at 0x7f40e346d890>

Next, initialize an MTCNN() object into the detector variable and use the .detect_faces() method to detect the faces in an image. Let’s see what it returns:

In [43]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
In [44]:
from mtcnn.mtcnn import MTCNN

# create the detector, using default weights
detector = MTCNN()
# detect faces in the image
faces = detector.detect_faces(image)
for face in faces:
  print(face)
{'box': [50, 33, 192, 227], 'confidence': 0.9992461204528809, 'keypoints': {'left_eye': (103, 105), 'right_eye': (192, 106), 'nose': (150, 156), 'mouth_left': (111, 204), 'mouth_right': (186, 205)}}

For every face, a Python dictionary is returned, which contains three keys. The box key contains the boundary of the face within the image. It has four values: x- and y- coordinates of the top left vertex, width, and height of the rectangle containing the face. The other keys are confidence and keypoints. The keypoints key contains a dictionary containing the features of a face that were detected, along with their coordinates:

Highlighting Faces in an Image

Now that we’ve successfully detected a face, let’s draw a rectangle over it to highlight the face within the image to verify if the detection was correct.

To draw a rectangle, import the Rectangle object from matplotlib.patches:

In [45]:
from matplotlib.patches import Rectangle

Let’s define a function highlight_faces to first display the image and then draw rectangles over faces that were detected. First, read the image through imread() and plot it through imshow(). For each face that was detected, draw a rectangle using the Rectangle() class.

Finally, display the image and the rectangles using the .show() method. If you’re using Jupyter notebooks, you may use the %matplotlib inline magic command to show plots inline:

In [46]:
def highlight_faces(image_path, faces):
  # display image
    image = plt.imread(image_path)
    plt.imshow(image)

    ax = plt.gca()

    # for each face, draw a rectangle based on coordinates
    for face in faces:
        x, y, width, height = face['box']
        face_border = Rectangle((x, y), width, height,
                          fill=False, color='red')
        ax.add_patch(face_border)
    plt.show()
In [48]:
highlight_faces('/content/drive/MyDrive/PINS/_PETER_DINKLAGE/Peter_Dinklage105_176.jpg', faces)
In [50]:
cd '/content/drive/MyDrive/PINS/_MORGAN_FREEMAN'
/content/drive/MyDrive/PINS/_MORGAN_FREEMAN
In [51]:
image = plt.imread('/content/drive/MyDrive/PINS/_MORGAN_FREEMAN/Morgan_Freeman116_580.jpg')
faces = detector.detect_faces(image)

highlight_faces('/content/drive/MyDrive/PINS/_MORGAN_FREEMAN/Morgan_Freeman116_580.jpg', faces)

In these two images, you can see that the MTCNN algorithm correctly detects faces. Let’s now extract this face from the image to perform further analysis on it.

Extracting Face for Further Analysis

At this point, you know the coordinates of the faces from the detector. Extracting the faces is a fairly easy task using list indices. However, the VGGFace2 algorithm that we use needs the faces to be resized to 224 x 224 pixels. We’ll use the PIL library to resize the images.

In [52]:
# The function extract_face_from_image() extracts all faces from an image:

# extract a single face from a given photograph
def extract_face(filename, required_size=(224, 224)):
	# load image from file
	pixels = cv2.imread(filename)
 
	# create the detector, using default weights
	detector = MTCNN()
 
	# detect faces in the image
	results = detector.detect_faces(pixels)
 
	# extract the bounding box from the first face
	x1, y1, width, height = results[0]['box']
	x2, y2 = x1 + width, y1 + height

	# extract the face
	face = pixels[y1:y2, x1:x2]

	# resize pixels to the model size
	image = Image.fromarray(face)
	image = image.resize(required_size)
	face_array = asarray(image)
	return face_array
In [55]:
cd '/content/drive/MyDrive/PINS/_ELIZA_TAYLOR'
/content/drive/MyDrive/PINS/_ELIZA_TAYLOR
In [56]:
# import Image, ImageTk
# load the photo and extract the face
pixels = extract_face('/content/drive/MyDrive/PINS/_ELIZA_TAYLOR/eliza_taylor101.jpg')

# plot the extracted face
plt.imshow(pixels)

# show the plot
plt.show()
In [57]:
pip install keras_vggface
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Requirement already satisfied: keras_vggface in /usr/local/lib/python3.7/dist-packages (0.6)
Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (6.0)
Requirement already satisfied: pillow in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (7.1.2)
Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (1.21.6)
Requirement already satisfied: keras in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (2.8.0)
Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (1.15.0)
Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (1.7.3)
Requirement already satisfied: h5py in /usr/local/lib/python3.7/dist-packages (from keras_vggface) (3.1.0)
Requirement already satisfied: cached-property in /usr/local/lib/python3.7/dist-packages (from h5py->keras_vggface) (1.5.2)
In [58]:
pip install keras_applications
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Requirement already satisfied: keras_applications in /usr/local/lib/python3.7/dist-packages (1.0.8)
Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.7/dist-packages (from keras_applications) (1.21.6)
Requirement already satisfied: h5py in /usr/local/lib/python3.7/dist-packages (from keras_applications) (3.1.0)
Requirement already satisfied: cached-property in /usr/local/lib/python3.7/dist-packages (from h5py->keras_applications) (1.5.2)
In [60]:
# Performing image preprocessing before fed into the VGG Face model

from numpy import expand_dims
from keras_vggface.utils import preprocess_input

# load the photo and extract the face
pixels = extract_face('/content/drive/MyDrive/PINS/_ELIZA_TAYLOR/eliza_taylor101.jpg')

# convert one face into samples
pixels = pixels.astype('float32')
samples = expand_dims(pixels, axis=0)

# prepare the face for the model, e.g. center pixels
samples = preprocess_input(samples, version=2)
In [61]:
# Predicting the label of the sample image

from keras_vggface.utils import decode_predictions

# perform prediction
yhat = vgg_face_descriptor.predict(samples)

# convert prediction into names
results = decode_predictions(yhat)

# display most likely results
for result in results[0]:
	print('%s: %.2f%%' % (result[0], result[1]))
Downloading data from https://github.com/rcmalli/keras-vggface/releases/download/v2.0/rcmalli_vggface_labels_v1.npy
352256/346184 [==============================] - 0s 0us/step
360448/346184 [===============================] - 0s 0us/step
b'Dove_Cameron': 16.70%
b'Claudia_Lee': 14.98%
b'Cassi_Thomson': 14.27%
b'Miranda_Richardson': 12.64%
b'Jenny_Wade': 12.03%

The model was able to predict the image correctly as Natalie_Portman but the confidence level is very less with only 19.3%

Performing Face Verification With VGGFace2

A VGGFace2 model can be used for face verification.

This involves calculating a face embedding for a new given face and comparing the embedding to the embedding for the single example of the face known to the system.

A face embedding is a vector that represents the features extracted from the face. This can then be compared with the vectors generated for other faces. For example, another vector that is close (by some measure) may be the same person, whereas another vector that is far (by some measure) may be a different person.

Typical measures such as Euclidean distance and Cosine distance are calculated between two embeddings and faces are said to match or verify if the distance is below a predefined threshold, often tuned for a specific dataset or application.

First, we can load the VGGFace model without the classifier by setting the ‘include_top‘ argument to ‘False‘, specifying the shape of the output via the ‘input_shape‘ and setting ‘pooling‘ to ‘avg‘ so that the filter maps at the output end of the model are reduced to a vector using global average pooling.

In [62]:
def get_embeddings(filenames):
	# extract faces
	faces = [extract_face(f) for f in filenames]
	# convert into an array of samples
	samples = asarray(faces, 'float32')
	# prepare the face for the model, e.g. center pixels
	samples = preprocess_input(samples, version=2)

	# perform prediction
	yhat = vgg_face_descriptor.predict(samples)
	return yhat

We can take our photograph of Natalie Portman used previously (e.g. Natalie_Portman_46.jpg) as our definition of the identity of Natalie Portman by calculating and storing the face embedding for the face in that photograph.

We can then calculate embeddings for faces in other photographs of Natalie Portman and test whether we can effectively verify her identity. We can also use faces from photographs of other people to confirm that they are not verified as Natalie Portman.

Verification can be performed by calculating the Cosine distance between the embedding for the known identity and the embeddings of candidate faces. This can be achieved using the cosine() SciPy function. The maximum distance between two embeddings is a score of 1.0, whereas the minimum distance is 0.0. A common cut-off value used for face identity is between 0.4 and 0.6, such as 0.5, although this should be tuned for an application.

The is_match() function below implements this, calculating the distance between two embeddings and interpreting the result.

In [63]:
# Defining functions to measure the distance between two different images to find how identical they are

# determine if a candidate face is a match for a known face
def is_match(known_embedding, candidate_embedding, thresh=0.5):
	# calculate distance between embeddings
	score = cosine(known_embedding, candidate_embedding)
	if score <= thresh:
		print('>face is a Match (%.3f <= %.3f)' % (score, thresh))
	else:
		print('>face is NOT a Match (%.3f > %.3f)' % (score, thresh))
In [64]:
def L2Norm(H1,H2):
    distance =0
    for i in range(len(H1)):
        distance += np.square(H1[i]-H2[i])
    return np.sqrt(distance)
In [65]:
# Encoding the labels

from sklearn.preprocessing import LabelEncoder

enc = LabelEncoder()
y = enc.fit_transform(classes)

num_classes = len(np.unique(y))
num_classes
Out[65]:
40

Splitting data into training and testing set

training set : 70%

testing set: 30%

In [66]:
X_train, X_test, y_train, y_test = train_test_split(images, y, test_size=0.3, random_state=50)
In [67]:
X_train, X_val, y_train, y_val  = train_test_split(X_train, y_train, test_size=0.25, random_state=1)
In [68]:
print(f"#### Shapes of Train, Test and Validation sets are: {X_train.shape},{X_test.shape},{X_val.shape}")
#### Shapes of Train, Test and Validation sets are: (2307, 224, 224, 3),(1319, 224, 224, 3),(770, 224, 224, 3)
In [69]:
print(f"#### Shapes of Train, Test and Validation sets of Target are: {y_train.shape},{y_test.shape},{y_val.shape}")
#### Shapes of Train, Test and Validation sets of Target are: (2307,),(1319,),(770,)

Classification using Supervised Learning Techniques

Support Vector Machines

The dataset have over 65536 features for each image and only 3336 images total in train dataset. To use an SVM, our model of choice, the number of features needs to be reduced.

PCA is a way of linearly transforming the data such that most of the information in the data is contained within a smaller number of features called components.

In [70]:
def flatten(X):
  Xr, Xg, Xb = [],[],[]
  for samples in X:
    r, g, b = cv2.split(samples)
    Xr.append(r.flatten())
    Xg.append(g.flatten())
    Xb.append(b.flatten())
  Xr = np.array(Xr)
  Xg = np.array(Xg)
  Xb = np.array(Xb)
  return (Xr, Xg, Xb)
In [71]:
X_train_r, X_train_g, X_train_b = flatten(X_train)
X_test_r, X_test_g, X_test_b = flatten(X_test)
X_val_r, X_val_g, X_val_b = flatten(X_val)
In [72]:
X_train_r.shape, X_train_g.shape, X_train_b.shape,
X_test_r.shape, X_test_g.shape, X_test_b.shape,
X_val_r.shape, X_val_g.shape, X_val_b.shape
Out[72]:
((770, 50176), (770, 50176), (770, 50176))
In [73]:
from sklearn.decomposition import PCA as RandomizedPCA

n_components = 500
X_train_pca_r = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train_r)
X_train_pca_g = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train_g)
X_train_pca_b = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train_b)

X_test_pca_r = RandomizedPCA(n_components=n_components, whiten=True).fit(X_test_r)
X_test_pca_g = RandomizedPCA(n_components=n_components, whiten=True).fit(X_test_g)
X_test_pca_b = RandomizedPCA(n_components=n_components, whiten=True).fit(X_test_b)

X_val_pca_r = RandomizedPCA(n_components=n_components, whiten=True).fit(X_val_r)
X_val_pca_g = RandomizedPCA(n_components=n_components, whiten=True).fit(X_val_g)
X_val_pca_b = RandomizedPCA(n_components=n_components, whiten=True).fit(X_val_b)
In [74]:
Xr_train_pca = X_train_pca_r.transform(X_train_r)
Xg_train_pca = X_train_pca_g.transform(X_train_g)
Xb_train_pca = X_train_pca_b.transform(X_train_b)

Xr_test_pca = X_test_pca_r.transform(X_test_r)
Xg_test_pca = X_test_pca_g.transform(X_test_g)
Xb_test_pca = X_test_pca_b.transform(X_test_b)

Xr_val_pca = X_val_pca_r.transform(X_val_r)
Xg_val_pca = X_val_pca_g.transform(X_val_g)
Xb_val_pca = X_val_pca_b.transform(X_val_b)
In [75]:
X_train_pca_r.explained_variance_ratio_.sum(), X_train_pca_g.explained_variance_ratio_.sum(), X_train_pca_b.explained_variance_ratio_.sum(),
X_test_pca_r.explained_variance_ratio_.sum(), X_test_pca_g.explained_variance_ratio_.sum(), X_test_pca_b.explained_variance_ratio_.sum(),
X_val_pca_r.explained_variance_ratio_.sum(), X_val_pca_g.explained_variance_ratio_.sum(), X_val_pca_b.explained_variance_ratio_.sum()
Out[75]:
(0.9875373195929534, 0.9876502317957323, 0.9889791615498413)
In [76]:
X_train_pca = np.concatenate([Xr_train_pca,Xg_train_pca,Xb_train_pca], axis=1)
X_test_pca = np.concatenate([Xr_test_pca,Xg_test_pca,Xb_test_pca], axis=1)
X_val_pca = np.concatenate([Xr_val_pca,Xg_val_pca,Xb_val_pca], axis=1)
In [77]:
X_train_pca.shape, y_train.shape,
X_test_pca.shape, y_test.shape,
X_val_pca.shape, y_val.shape
Out[77]:
((770, 1500), (770,))
In [78]:
from sklearn.svm import SVC
from sklearn.model_selection import learning_curve, GridSearchCV

param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
svc = SVC()
clf = GridSearchCV(svc, param_grid, verbose=10, n_jobs=-1)
clf.fit(X_train_pca, y_train)
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Out[78]:
GridSearchCV(estimator=SVC(), n_jobs=-1,
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}],
             verbose=10)
In [79]:
X_train.shape[0]
Out[79]:
2307
In [80]:
y_train.shape[0]
Out[80]:
2307
In [81]:
svm_score = clf.score(X_val_pca, y_val)
In [82]:
from sklearn import svm, metrics

y_pred = clf.predict(X_test_pca)
svm_accuracy = metrics.accuracy_score(y_test, y_pred)
In [89]:
# Prediction using SVM

image = '/content/drive/MyDrive/PINS/_ELIZA_TAYLOR/eliza_taylor65.jpg'
img_pred = plt.imread(image)
img_pred_resize = cv2.resize(img_pred,(224,224), interpolation = cv2.INTER_CUBIC)
img_predict = np.expand_dims(img_pred_resize, axis=0)

pred_img = np.squeeze(img_predict, axis=0)
X_pred_r, X_pred_g, X_pred_b = cv2.split(pred_img)
In [90]:
X_pred_pca_r = X_train_pca_r.transform(np.expand_dims(X_pred_r.flatten(), axis=0))
X_pred_pca_g = X_train_pca_g.transform(np.expand_dims(X_pred_g.flatten(), axis=0))
X_pred_pca_b = X_train_pca_b.transform(np.expand_dims(X_pred_b.flatten(), axis=0))

X_pred_pca = np.concatenate([X_pred_pca_r,X_pred_pca_g,X_pred_pca_b], axis=1)
In [91]:
pred_svm = clf.predict(X_pred_pca)[0]

print(f"The predicted label is:'{pred_svm}'")
The predicted label is:'14'
In [92]:
example_identity = enc.inverse_transform(np.ravel(pred_svm))[0]

plt.imshow(img_pred_resize)
plt.title(f'Recognized as {example_identity}');
In [93]:
model_performance = pd.DataFrame(columns=['Model', 'Accuracy', 'Predicted Person'])

model_performance = model_performance.append({'Model':'SVM',
                                              'Accuracy': svm_accuracy,
                                              'Predicted Person': example_identity                                      
                                              }, ignore_index=True)

model_performance
Out[93]:
Model Accuracy Predicted Person
0 SVM 0.040182 _ELIZA_TAYLOR